{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "from sklearn.impute import SimpleImputer\n",
    "from numpy import loadtxt\n",
    "from xgboost import XGBClassifier\n",
    "from matplotlib import pyplot\n",
    "from xgboost import plot_importance\n",
    "import pandas as pd\n",
    "import numpy as np\n",
    "from sklearn.linear_model import Lasso\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [],
   "source": [
    "df = pd.read_excel('/home/yx/3090/project/P_prediction/Data/肺部并发症预测/肺部并发症分析数据-日期排序(1).xlsx')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [],
   "source": [
    "df_X = df.drop([\n",
    "'Unnamed: 0', '手麻手术排程ID','住院号','身高:cm','体重:kg','屏气试验_不适用','呼吸困难_与体位的关系',\n",
    "'戒烟','一秒用力呼气容积（FEV1）/用力肺活量(FVC)','一秒用力呼气容积（FEV1）/预计值',\n",
    "'血气分析_FiO2','颅内出血_选择','颅内高压','脑疝','是否精神认知异常','神经肌肉系统疾病','甲状腺功能异常_选择','治疗',\n",
    "'糖尿病_血糖水平_前三项','糖尿病_血糖水平_后两项','嗜铬细胞瘤','腹水','分期','透析','既往困难通气史','既往困难插管史',\n",
    "'张口低于二横指_选择','下颌前伸','颈部活动<90°_选择','睡眠呼吸暂停综合征','呼吸道梗阻','纵膈病史选择','呼吸道梗阻_选择',\n",
    "'术后恶心呕吐','华法林停药时间','阿司匹林停药时间','氯吡格雷停药时间','肿瘤治疗','手术类型_血管内介入手术','手术风险评估_预计失血量',\n",
    "'华西围术期风险评分','通气困难评分','插管困难评分','心血管不良事件风险','肺部并发症风险','卒中风险','术后谵妄风险',\n",
    "'急性肾损伤风险','术后恶心呕吐风险','术后中重度疼痛','心功能_不适用','运动当量_不适用','心电图检查结果选项','辅助性检查_LV',\n",
    "'辅助性检查_RV','辅助性检查_SV','辅助性检查_EF','辅助性检查_肺动脉压','高血压利血平停药时间','缺血性心脏病_检查','是否患心脏瓣膜病',\n",
    "'先天性心脏病_选择','扩张性心肌病','扩张性心肌病_选择','肥厚型心肌病_选择','是否低危型','是否中危型','是否高危型',\n",
    "'心源性晕厥史','安置起搏器_选择','主动脉夹层_选择','是否动脉疾病','是否静脉疾病','是否有心包疾病','出血_近1周消化道出血病史_选择_下消化道',\n",
    "'出血_近1周消化道出血病史_选择_上消化道','胃食管反流','消化性溃疡','戒酒时间','其他消化系统疾病','是否凝血功能异常','是否其他血液疾病',\n",
    "'是否过敏','是否免疫功能障碍','是否自身免疫性疾病','访视类型','手术开始日期','手术结束日期','促甲状腺激素受体抗体','尿蛋白定量','抗甲状腺过氧化物酶抗体',\n",
    "'甲状腺球蛋白','空腹C肽','血氨','血糖','酮体定性','餐后2小时C肽','心肌缺血','心脏骤停','心力衰竭','死亡','高危心律失常','卒中',\n",
    "'严重心血管不良','急性肾损伤','肺不张','肺水肿','气胸','液（血）胸','肺部感染','肺栓塞','拔管延迟或困难','呼吸衰竭',\n",
    "'术后谵妄','中重度疼痛','术后恶心呕吐_结局','低氧血症_只在旧版数据','急性呼吸窘迫综合征_只在旧版数据','呼吸抑制_只在新版数据',\n",
    "'患者姓名','诊断','手术名称','手术开始时间_术中','手术结束时间_术中','麻醉开始时间_术中','麻醉结束时间_术中','麻醉时长（分钟）',\n",
    "'人工气道类型','手术日期与哮喘上次发作日期时间差','手术日期_充血性心力衰竭上次发作日期','拟行手术','手术风险评估_预计手术时间'\n",
    "], axis=1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>性别</th>\n",
       "      <th>BMI</th>\n",
       "      <th>收缩压</th>\n",
       "      <th>舒张压</th>\n",
       "      <th>呼吸</th>\n",
       "      <th>心率</th>\n",
       "      <th>体温</th>\n",
       "      <th>是否使用活性药物</th>\n",
       "      <th>急诊/择期</th>\n",
       "      <th>一般情况</th>\n",
       "      <th>...</th>\n",
       "      <th>门冬氨酸氨基转移酶</th>\n",
       "      <th>间接胆红素</th>\n",
       "      <th>阴离子间隙</th>\n",
       "      <th>降钙素原</th>\n",
       "      <th>高密度脂蛋白</th>\n",
       "      <th>肺部并发症</th>\n",
       "      <th>年龄_术中</th>\n",
       "      <th>ASA分级</th>\n",
       "      <th>手术时长（分钟）</th>\n",
       "      <th>出血量</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>2</td>\n",
       "      <td>25.203981</td>\n",
       "      <td>129.0</td>\n",
       "      <td>81.0</td>\n",
       "      <td>20.0</td>\n",
       "      <td>110.0</td>\n",
       "      <td>36.4</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>...</td>\n",
       "      <td>10.0</td>\n",
       "      <td>3.5</td>\n",
       "      <td>16.6</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.41</td>\n",
       "      <td>0</td>\n",
       "      <td>67</td>\n",
       "      <td>3</td>\n",
       "      <td>230.316667</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1</td>\n",
       "      <td>26.757812</td>\n",
       "      <td>123.0</td>\n",
       "      <td>79.0</td>\n",
       "      <td>20.0</td>\n",
       "      <td>74.0</td>\n",
       "      <td>36.5</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>...</td>\n",
       "      <td>54.0</td>\n",
       "      <td>13.0</td>\n",
       "      <td>16.4</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.98</td>\n",
       "      <td>0</td>\n",
       "      <td>66</td>\n",
       "      <td>3</td>\n",
       "      <td>288.216667</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>1</td>\n",
       "      <td>20.213384</td>\n",
       "      <td>114.0</td>\n",
       "      <td>66.0</td>\n",
       "      <td>20.0</td>\n",
       "      <td>50.0</td>\n",
       "      <td>35.7</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>...</td>\n",
       "      <td>60.0</td>\n",
       "      <td>4.5</td>\n",
       "      <td>19.2</td>\n",
       "      <td>0.10</td>\n",
       "      <td>1.15</td>\n",
       "      <td>0</td>\n",
       "      <td>70</td>\n",
       "      <td>3</td>\n",
       "      <td>70.033333</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>1</td>\n",
       "      <td>23.051755</td>\n",
       "      <td>117.0</td>\n",
       "      <td>77.0</td>\n",
       "      <td>20.0</td>\n",
       "      <td>78.0</td>\n",
       "      <td>36.5</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>...</td>\n",
       "      <td>36.0</td>\n",
       "      <td>17.8</td>\n",
       "      <td>19.1</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.44</td>\n",
       "      <td>0</td>\n",
       "      <td>82</td>\n",
       "      <td>2</td>\n",
       "      <td>100.000000</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>1</td>\n",
       "      <td>15.943878</td>\n",
       "      <td>124.0</td>\n",
       "      <td>70.0</td>\n",
       "      <td>20.0</td>\n",
       "      <td>80.0</td>\n",
       "      <td>36.7</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>...</td>\n",
       "      <td>25.0</td>\n",
       "      <td>6.8</td>\n",
       "      <td>17.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.56</td>\n",
       "      <td>0</td>\n",
       "      <td>73</td>\n",
       "      <td>2</td>\n",
       "      <td>34.983333</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>17352</th>\n",
       "      <td>2</td>\n",
       "      <td>22.558610</td>\n",
       "      <td>135.0</td>\n",
       "      <td>69.0</td>\n",
       "      <td>18.0</td>\n",
       "      <td>91.0</td>\n",
       "      <td>36.3</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>...</td>\n",
       "      <td>14.0</td>\n",
       "      <td>7.8</td>\n",
       "      <td>14.6</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.44</td>\n",
       "      <td>0</td>\n",
       "      <td>69</td>\n",
       "      <td>2</td>\n",
       "      <td>399.333333</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>17353</th>\n",
       "      <td>1</td>\n",
       "      <td>22.761468</td>\n",
       "      <td>157.0</td>\n",
       "      <td>71.0</td>\n",
       "      <td>20.0</td>\n",
       "      <td>75.0</td>\n",
       "      <td>36.3</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>...</td>\n",
       "      <td>13.0</td>\n",
       "      <td>4.9</td>\n",
       "      <td>14.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.01</td>\n",
       "      <td>0</td>\n",
       "      <td>75</td>\n",
       "      <td>3</td>\n",
       "      <td>204.950000</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>17354</th>\n",
       "      <td>2</td>\n",
       "      <td>27.005131</td>\n",
       "      <td>125.0</td>\n",
       "      <td>74.0</td>\n",
       "      <td>20.0</td>\n",
       "      <td>84.0</td>\n",
       "      <td>36.5</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>...</td>\n",
       "      <td>20.0</td>\n",
       "      <td>13.3</td>\n",
       "      <td>22.7</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.10</td>\n",
       "      <td>0</td>\n",
       "      <td>82</td>\n",
       "      <td>3</td>\n",
       "      <td>89.333333</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>17355</th>\n",
       "      <td>1</td>\n",
       "      <td>29.136316</td>\n",
       "      <td>132.0</td>\n",
       "      <td>71.0</td>\n",
       "      <td>14.0</td>\n",
       "      <td>82.0</td>\n",
       "      <td>36.5</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>...</td>\n",
       "      <td>35.0</td>\n",
       "      <td>5.1</td>\n",
       "      <td>17.2</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.73</td>\n",
       "      <td>0</td>\n",
       "      <td>69</td>\n",
       "      <td>3</td>\n",
       "      <td>60.750000</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>17356</th>\n",
       "      <td>1</td>\n",
       "      <td>23.140496</td>\n",
       "      <td>125.0</td>\n",
       "      <td>84.0</td>\n",
       "      <td>18.0</td>\n",
       "      <td>80.0</td>\n",
       "      <td>36.2</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>...</td>\n",
       "      <td>17.0</td>\n",
       "      <td>6.2</td>\n",
       "      <td>18.4</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.15</td>\n",
       "      <td>0</td>\n",
       "      <td>66</td>\n",
       "      <td>2</td>\n",
       "      <td>43.950000</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>17357 rows × 145 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "       性别        BMI    收缩压   舒张压    呼吸     心率    体温  是否使用活性药物  急诊/择期  一般情况  \\\n",
       "0       2  25.203981  129.0  81.0  20.0  110.0  36.4         0      0   2.0   \n",
       "1       1  26.757812  123.0  79.0  20.0   74.0  36.5         0      0   1.0   \n",
       "2       1  20.213384  114.0  66.0  20.0   50.0  35.7         0      0   1.0   \n",
       "3       1  23.051755  117.0  77.0  20.0   78.0  36.5         0      0   2.0   \n",
       "4       1  15.943878  124.0  70.0  20.0   80.0  36.7         0      0   2.0   \n",
       "...    ..        ...    ...   ...   ...    ...   ...       ...    ...   ...   \n",
       "17352   2  22.558610  135.0  69.0  18.0   91.0  36.3         0      0   1.0   \n",
       "17353   1  22.761468  157.0  71.0  20.0   75.0  36.3         0      0   1.0   \n",
       "17354   2  27.005131  125.0  74.0  20.0   84.0  36.5         0      0   2.0   \n",
       "17355   1  29.136316  132.0  71.0  14.0   82.0  36.5         0      0   1.0   \n",
       "17356   1  23.140496  125.0  84.0  18.0   80.0  36.2         0      0   1.0   \n",
       "\n",
       "       ...  门冬氨酸氨基转移酶  间接胆红素  阴离子间隙  降钙素原  高密度脂蛋白  肺部并发症  年龄_术中  ASA分级  \\\n",
       "0      ...       10.0    3.5   16.6   NaN    1.41      0     67      3   \n",
       "1      ...       54.0   13.0   16.4   NaN    0.98      0     66      3   \n",
       "2      ...       60.0    4.5   19.2  0.10    1.15      0     70      3   \n",
       "3      ...       36.0   17.8   19.1   NaN    1.44      0     82      2   \n",
       "4      ...       25.0    6.8   17.0   NaN    1.56      0     73      2   \n",
       "...    ...        ...    ...    ...   ...     ...    ...    ...    ...   \n",
       "17352  ...       14.0    7.8   14.6   NaN    1.44      0     69      2   \n",
       "17353  ...       13.0    4.9   14.0   NaN    1.01      0     75      3   \n",
       "17354  ...       20.0   13.3   22.7   NaN    1.10      0     82      3   \n",
       "17355  ...       35.0    5.1   17.2   NaN    0.73      0     69      3   \n",
       "17356  ...       17.0    6.2   18.4   NaN    1.15      0     66      2   \n",
       "\n",
       "         手术时长（分钟）  出血量  \n",
       "0      230.316667  NaN  \n",
       "1      288.216667  NaN  \n",
       "2       70.033333  NaN  \n",
       "3      100.000000  NaN  \n",
       "4       34.983333  NaN  \n",
       "...           ...  ...  \n",
       "17352  399.333333  NaN  \n",
       "17353  204.950000  NaN  \n",
       "17354   89.333333  NaN  \n",
       "17355   60.750000  NaN  \n",
       "17356   43.950000  NaN  \n",
       "\n",
       "[17357 rows x 145 columns]"
      ]
     },
     "execution_count": 27,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_X"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {},
   "outputs": [],
   "source": [
    "text = df_X.pop('术前诊断(head_pd)')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {},
   "outputs": [],
   "source": [
    "df_X_qs = df_X.drop([\n",
    "'反三碘甲状腺原氨酸','25-羟基维生素D','抗甲状腺球蛋白抗体','肿瘤坏死因子α','糖化血红蛋白A1c',\n",
    "'尿钠素','降钙素原','三碘甲状腺原氨酸','甲状腺素','游离三碘甲状腺原氨酸','促甲状腺刺激激素',\n",
    "'氧分压','碳酸氢根','缓冲碱','酸碱度','氧合血红蛋白浓度','二氧化碳分压','全血乳酸',\n",
    "'全血碱剩余','游离甲状腺素','血沉','白细胞介素6','肌酸激酶同功酶MB质量','肌红蛋白','肌钙蛋白-T',\n",
    "'纤维蛋白及纤维蛋白原降解产物','D-二聚体','C-反应蛋白','上皮细胞','尿胆原定性','尿胆红素定性',\n",
    "'尿葡萄糖','尿蛋白定性','比重'\n",
    "], axis=1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [],
   "source": [
    "df_X_qs.to_pickle('/home/yx/3090/project/P_prediction/Data/肺部并发症预测/data_time.pkl')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 77,
   "metadata": {},
   "outputs": [],
   "source": [
    "def get_null_rate(df_X):\n",
    "    data = pd.DataFrame(((df_X.isnull().sum())/df_X.shape[0]).sort_values(ascending=False).map(lambda x:\"{:.2%}\".format(x)))\n",
    "    return data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 78,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0        0\n",
       "1        0\n",
       "2        0\n",
       "3        1\n",
       "4        0\n",
       "        ..\n",
       "17367    0\n",
       "17368    0\n",
       "17369    0\n",
       "17370    0\n",
       "17371    0\n",
       "Name: 肺部并发症, Length: 17372, dtype: int64"
      ]
     },
     "execution_count": 78,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "y"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 79,
   "metadata": {},
   "outputs": [],
   "source": [
    "data = []\n",
    "for i in df_X_qs['手术时长（分钟）']:\n",
    "    if i< 120 :\n",
    "        data.append(1)\n",
    "    elif i < 240:\n",
    "        data.append(2)\n",
    "    else:\n",
    "        data.append(3)\n",
    "df_X_qs['手术时间_分级'] = data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 80,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>性别</th>\n",
       "      <th>BMI</th>\n",
       "      <th>收缩压</th>\n",
       "      <th>舒张压</th>\n",
       "      <th>呼吸</th>\n",
       "      <th>心率</th>\n",
       "      <th>体温</th>\n",
       "      <th>是否使用活性药物</th>\n",
       "      <th>急诊/择期</th>\n",
       "      <th>一般情况</th>\n",
       "      <th>...</th>\n",
       "      <th>镁</th>\n",
       "      <th>门冬氨酸氨基转移酶</th>\n",
       "      <th>间接胆红素</th>\n",
       "      <th>阴离子间隙</th>\n",
       "      <th>高密度脂蛋白</th>\n",
       "      <th>年龄_术中</th>\n",
       "      <th>ASA分级</th>\n",
       "      <th>手术时长（分钟）</th>\n",
       "      <th>出血量</th>\n",
       "      <th>手术时间_分级</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>22.100290</td>\n",
       "      <td>144.0</td>\n",
       "      <td>85.0</td>\n",
       "      <td>20.0</td>\n",
       "      <td>80.0</td>\n",
       "      <td>36.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.82</td>\n",
       "      <td>16.0</td>\n",
       "      <td>9.4</td>\n",
       "      <td>19.6</td>\n",
       "      <td>1.59</td>\n",
       "      <td>73</td>\n",
       "      <td>2</td>\n",
       "      <td>160.100000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1</td>\n",
       "      <td>21.453287</td>\n",
       "      <td>127.0</td>\n",
       "      <td>73.0</td>\n",
       "      <td>20.0</td>\n",
       "      <td>76.0</td>\n",
       "      <td>36.7</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.84</td>\n",
       "      <td>30.0</td>\n",
       "      <td>12.3</td>\n",
       "      <td>20.1</td>\n",
       "      <td>1.63</td>\n",
       "      <td>71</td>\n",
       "      <td>3</td>\n",
       "      <td>170.050000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2</td>\n",
       "      <td>22.806017</td>\n",
       "      <td>128.0</td>\n",
       "      <td>78.0</td>\n",
       "      <td>20.0</td>\n",
       "      <td>78.0</td>\n",
       "      <td>36.5</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.91</td>\n",
       "      <td>24.0</td>\n",
       "      <td>14.0</td>\n",
       "      <td>22.2</td>\n",
       "      <td>1.72</td>\n",
       "      <td>77</td>\n",
       "      <td>2</td>\n",
       "      <td>125.666667</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>2</td>\n",
       "      <td>24.973985</td>\n",
       "      <td>158.0</td>\n",
       "      <td>97.0</td>\n",
       "      <td>20.0</td>\n",
       "      <td>87.0</td>\n",
       "      <td>36.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.95</td>\n",
       "      <td>23.0</td>\n",
       "      <td>5.3</td>\n",
       "      <td>19.9</td>\n",
       "      <td>1.42</td>\n",
       "      <td>72</td>\n",
       "      <td>3</td>\n",
       "      <td>144.833333</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>2</td>\n",
       "      <td>24.444444</td>\n",
       "      <td>126.0</td>\n",
       "      <td>90.0</td>\n",
       "      <td>20.0</td>\n",
       "      <td>96.0</td>\n",
       "      <td>36.7</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.89</td>\n",
       "      <td>105.0</td>\n",
       "      <td>9.1</td>\n",
       "      <td>18.0</td>\n",
       "      <td>1.04</td>\n",
       "      <td>66</td>\n",
       "      <td>2</td>\n",
       "      <td>285.100000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>17367</th>\n",
       "      <td>2</td>\n",
       "      <td>27.005131</td>\n",
       "      <td>125.0</td>\n",
       "      <td>74.0</td>\n",
       "      <td>20.0</td>\n",
       "      <td>84.0</td>\n",
       "      <td>36.5</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.85</td>\n",
       "      <td>20.0</td>\n",
       "      <td>13.3</td>\n",
       "      <td>22.7</td>\n",
       "      <td>1.10</td>\n",
       "      <td>82</td>\n",
       "      <td>3</td>\n",
       "      <td>89.333333</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>17368</th>\n",
       "      <td>1</td>\n",
       "      <td>25.816630</td>\n",
       "      <td>126.0</td>\n",
       "      <td>78.0</td>\n",
       "      <td>20.0</td>\n",
       "      <td>88.0</td>\n",
       "      <td>36.5</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.88</td>\n",
       "      <td>17.0</td>\n",
       "      <td>11.2</td>\n",
       "      <td>19.8</td>\n",
       "      <td>1.48</td>\n",
       "      <td>79</td>\n",
       "      <td>3</td>\n",
       "      <td>167.816667</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>17369</th>\n",
       "      <td>2</td>\n",
       "      <td>24.449375</td>\n",
       "      <td>119.0</td>\n",
       "      <td>67.0</td>\n",
       "      <td>17.0</td>\n",
       "      <td>60.0</td>\n",
       "      <td>36.8</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.99</td>\n",
       "      <td>19.0</td>\n",
       "      <td>13.5</td>\n",
       "      <td>6.3</td>\n",
       "      <td>1.34</td>\n",
       "      <td>66</td>\n",
       "      <td>2</td>\n",
       "      <td>44.666667</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>17370</th>\n",
       "      <td>1</td>\n",
       "      <td>23.140496</td>\n",
       "      <td>125.0</td>\n",
       "      <td>84.0</td>\n",
       "      <td>18.0</td>\n",
       "      <td>80.0</td>\n",
       "      <td>36.2</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.85</td>\n",
       "      <td>17.0</td>\n",
       "      <td>6.2</td>\n",
       "      <td>18.4</td>\n",
       "      <td>1.15</td>\n",
       "      <td>66</td>\n",
       "      <td>2</td>\n",
       "      <td>43.950000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>17371</th>\n",
       "      <td>1</td>\n",
       "      <td>22.761468</td>\n",
       "      <td>157.0</td>\n",
       "      <td>71.0</td>\n",
       "      <td>20.0</td>\n",
       "      <td>75.0</td>\n",
       "      <td>36.3</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0.86</td>\n",
       "      <td>13.0</td>\n",
       "      <td>4.9</td>\n",
       "      <td>14.0</td>\n",
       "      <td>1.01</td>\n",
       "      <td>75</td>\n",
       "      <td>3</td>\n",
       "      <td>204.950000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>17372 rows × 110 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "       性别        BMI    收缩压   舒张压    呼吸    心率    体温  是否使用活性药物  急诊/择期  一般情况  \\\n",
       "0       1  22.100290  144.0  85.0  20.0  80.0  36.0         0      0   1.0   \n",
       "1       1  21.453287  127.0  73.0  20.0  76.0  36.7         0      0   1.0   \n",
       "2       2  22.806017  128.0  78.0  20.0  78.0  36.5         0      0   1.0   \n",
       "3       2  24.973985  158.0  97.0  20.0  87.0  36.0         0      0   1.0   \n",
       "4       2  24.444444  126.0  90.0  20.0  96.0  36.7         0      0   1.0   \n",
       "...    ..        ...    ...   ...   ...   ...   ...       ...    ...   ...   \n",
       "17367   2  27.005131  125.0  74.0  20.0  84.0  36.5         0      0   2.0   \n",
       "17368   1  25.816630  126.0  78.0  20.0  88.0  36.5         0      0   1.0   \n",
       "17369   2  24.449375  119.0  67.0  17.0  60.0  36.8         0      0   2.0   \n",
       "17370   1  23.140496  125.0  84.0  18.0  80.0  36.2         0      0   1.0   \n",
       "17371   1  22.761468  157.0  71.0  20.0  75.0  36.3         0      0   1.0   \n",
       "\n",
       "       ...     镁  门冬氨酸氨基转移酶  间接胆红素  阴离子间隙  高密度脂蛋白  年龄_术中  ASA分级    手术时长（分钟）  \\\n",
       "0      ...  0.82       16.0    9.4   19.6    1.59     73      2  160.100000   \n",
       "1      ...  0.84       30.0   12.3   20.1    1.63     71      3  170.050000   \n",
       "2      ...  0.91       24.0   14.0   22.2    1.72     77      2  125.666667   \n",
       "3      ...  0.95       23.0    5.3   19.9    1.42     72      3  144.833333   \n",
       "4      ...  0.89      105.0    9.1   18.0    1.04     66      2  285.100000   \n",
       "...    ...   ...        ...    ...    ...     ...    ...    ...         ...   \n",
       "17367  ...  0.85       20.0   13.3   22.7    1.10     82      3   89.333333   \n",
       "17368  ...  0.88       17.0   11.2   19.8    1.48     79      3  167.816667   \n",
       "17369  ...  0.99       19.0   13.5    6.3    1.34     66      2   44.666667   \n",
       "17370  ...  0.85       17.0    6.2   18.4    1.15     66      2   43.950000   \n",
       "17371  ...  0.86       13.0    4.9   14.0    1.01     75      3  204.950000   \n",
       "\n",
       "       出血量  手术时间_分级  \n",
       "0      NaN        2  \n",
       "1      NaN        2  \n",
       "2      NaN        2  \n",
       "3      NaN        2  \n",
       "4      NaN        3  \n",
       "...    ...      ...  \n",
       "17367  NaN        1  \n",
       "17368  NaN        2  \n",
       "17369  NaN        1  \n",
       "17370  NaN        1  \n",
       "17371  NaN        2  \n",
       "\n",
       "[17372 rows x 110 columns]"
      ]
     },
     "execution_count": 80,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_X_qs"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {},
   "outputs": [],
   "source": [
    "def is_number(s):\n",
    "    try:\n",
    "        float(s)\n",
    "        return True\n",
    "    except ValueError:\n",
    "        pass\n",
    " \n",
    "    try:\n",
    "        import unicodedata\n",
    "        \n",
    "        unicodedata.numeric(s)\n",
    "        return True\n",
    "    except (TypeError, ValueError):\n",
    "        pass\n",
    " \n",
    "    return False"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "metadata": {},
   "outputs": [],
   "source": [
    "def remove_outliers(df):\n",
    "    for col in df.columns:\n",
    "        for value in df[col]:\n",
    "            if type(value) is str:\n",
    "                if is_number(value):\n",
    "                    continue\n",
    "                else:\n",
    "                    df.loc[df[col] == value,[col]] = np.NaN\n",
    "    return df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "metadata": {},
   "outputs": [],
   "source": [
    "df_X_qs_remove = remove_outliers(df_X_qs)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "metadata": {},
   "outputs": [],
   "source": [
    "df_X_qs_remove['术前诊断'] = text"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>性别</th>\n",
       "      <th>BMI</th>\n",
       "      <th>收缩压</th>\n",
       "      <th>舒张压</th>\n",
       "      <th>呼吸</th>\n",
       "      <th>心率</th>\n",
       "      <th>体温</th>\n",
       "      <th>是否使用活性药物</th>\n",
       "      <th>急诊/择期</th>\n",
       "      <th>一般情况</th>\n",
       "      <th>...</th>\n",
       "      <th>门冬氨酸氨基转移酶</th>\n",
       "      <th>间接胆红素</th>\n",
       "      <th>阴离子间隙</th>\n",
       "      <th>高密度脂蛋白</th>\n",
       "      <th>肺部并发症</th>\n",
       "      <th>年龄_术中</th>\n",
       "      <th>ASA分级</th>\n",
       "      <th>手术时长（分钟）</th>\n",
       "      <th>出血量</th>\n",
       "      <th>术前诊断</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>2</td>\n",
       "      <td>25.203981</td>\n",
       "      <td>129.0</td>\n",
       "      <td>81.0</td>\n",
       "      <td>20.0</td>\n",
       "      <td>110.0</td>\n",
       "      <td>36.4</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>...</td>\n",
       "      <td>10.0</td>\n",
       "      <td>3.5</td>\n",
       "      <td>16.6</td>\n",
       "      <td>1.41</td>\n",
       "      <td>0</td>\n",
       "      <td>67</td>\n",
       "      <td>3</td>\n",
       "      <td>230.316667</td>\n",
       "      <td>NaN</td>\n",
       "      <td>2型糖尿病</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1</td>\n",
       "      <td>26.757812</td>\n",
       "      <td>123.0</td>\n",
       "      <td>79.0</td>\n",
       "      <td>20.0</td>\n",
       "      <td>74.0</td>\n",
       "      <td>36.5</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>...</td>\n",
       "      <td>54.0</td>\n",
       "      <td>13.0</td>\n",
       "      <td>16.4</td>\n",
       "      <td>0.98</td>\n",
       "      <td>0</td>\n",
       "      <td>66</td>\n",
       "      <td>3</td>\n",
       "      <td>288.216667</td>\n",
       "      <td>NaN</td>\n",
       "      <td>胃窦恶性肿瘤</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>1</td>\n",
       "      <td>20.213384</td>\n",
       "      <td>114.0</td>\n",
       "      <td>66.0</td>\n",
       "      <td>20.0</td>\n",
       "      <td>50.0</td>\n",
       "      <td>35.7</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>...</td>\n",
       "      <td>60.0</td>\n",
       "      <td>4.5</td>\n",
       "      <td>19.2</td>\n",
       "      <td>1.15</td>\n",
       "      <td>0</td>\n",
       "      <td>70</td>\n",
       "      <td>3</td>\n",
       "      <td>70.033333</td>\n",
       "      <td>NaN</td>\n",
       "      <td>肝细胞癌</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>1</td>\n",
       "      <td>23.051755</td>\n",
       "      <td>117.0</td>\n",
       "      <td>77.0</td>\n",
       "      <td>20.0</td>\n",
       "      <td>78.0</td>\n",
       "      <td>36.5</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>...</td>\n",
       "      <td>36.0</td>\n",
       "      <td>17.8</td>\n",
       "      <td>19.1</td>\n",
       "      <td>1.44</td>\n",
       "      <td>0</td>\n",
       "      <td>82</td>\n",
       "      <td>2</td>\n",
       "      <td>100.000000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>喉肿物</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>1</td>\n",
       "      <td>15.943878</td>\n",
       "      <td>124.0</td>\n",
       "      <td>70.0</td>\n",
       "      <td>20.0</td>\n",
       "      <td>80.0</td>\n",
       "      <td>36.7</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>...</td>\n",
       "      <td>25.0</td>\n",
       "      <td>6.8</td>\n",
       "      <td>17.0</td>\n",
       "      <td>1.56</td>\n",
       "      <td>0</td>\n",
       "      <td>73</td>\n",
       "      <td>2</td>\n",
       "      <td>34.983333</td>\n",
       "      <td>NaN</td>\n",
       "      <td>胃体恶性肿瘤</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>17352</th>\n",
       "      <td>2</td>\n",
       "      <td>22.558610</td>\n",
       "      <td>135.0</td>\n",
       "      <td>69.0</td>\n",
       "      <td>18.0</td>\n",
       "      <td>91.0</td>\n",
       "      <td>36.3</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>...</td>\n",
       "      <td>14.0</td>\n",
       "      <td>7.8</td>\n",
       "      <td>14.6</td>\n",
       "      <td>1.44</td>\n",
       "      <td>0</td>\n",
       "      <td>69</td>\n",
       "      <td>2</td>\n",
       "      <td>399.333333</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.左肺下叶结节：曲霉菌？ 2.支气管扩张伴感染 3.肺气肿 4.高血压病</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>17353</th>\n",
       "      <td>1</td>\n",
       "      <td>22.761468</td>\n",
       "      <td>157.0</td>\n",
       "      <td>71.0</td>\n",
       "      <td>20.0</td>\n",
       "      <td>75.0</td>\n",
       "      <td>36.3</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>...</td>\n",
       "      <td>13.0</td>\n",
       "      <td>4.9</td>\n",
       "      <td>14.0</td>\n",
       "      <td>1.01</td>\n",
       "      <td>0</td>\n",
       "      <td>75</td>\n",
       "      <td>3</td>\n",
       "      <td>204.950000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1:右肾盂癌，高血压，糖尿病</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>17354</th>\n",
       "      <td>2</td>\n",
       "      <td>27.005131</td>\n",
       "      <td>125.0</td>\n",
       "      <td>74.0</td>\n",
       "      <td>20.0</td>\n",
       "      <td>84.0</td>\n",
       "      <td>36.5</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>...</td>\n",
       "      <td>20.0</td>\n",
       "      <td>13.3</td>\n",
       "      <td>22.7</td>\n",
       "      <td>1.10</td>\n",
       "      <td>0</td>\n",
       "      <td>82</td>\n",
       "      <td>3</td>\n",
       "      <td>89.333333</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1:左侧胫骨平台骨折 2:高血压病2级 很高危</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>17355</th>\n",
       "      <td>1</td>\n",
       "      <td>29.136316</td>\n",
       "      <td>132.0</td>\n",
       "      <td>71.0</td>\n",
       "      <td>14.0</td>\n",
       "      <td>82.0</td>\n",
       "      <td>36.5</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>...</td>\n",
       "      <td>35.0</td>\n",
       "      <td>5.1</td>\n",
       "      <td>17.2</td>\n",
       "      <td>0.73</td>\n",
       "      <td>0</td>\n",
       "      <td>69</td>\n",
       "      <td>3</td>\n",
       "      <td>60.750000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1:1.急性胰腺炎、2.胆总管结石伴感染、3. 胃癌术后、4. 高血压、5. 糖尿病。</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>17356</th>\n",
       "      <td>1</td>\n",
       "      <td>23.140496</td>\n",
       "      <td>125.0</td>\n",
       "      <td>84.0</td>\n",
       "      <td>18.0</td>\n",
       "      <td>80.0</td>\n",
       "      <td>36.2</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>...</td>\n",
       "      <td>17.0</td>\n",
       "      <td>6.2</td>\n",
       "      <td>18.4</td>\n",
       "      <td>1.15</td>\n",
       "      <td>0</td>\n",
       "      <td>66</td>\n",
       "      <td>2</td>\n",
       "      <td>43.950000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1:双肾结石</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>17357 rows × 111 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "       性别        BMI    收缩压   舒张压    呼吸     心率    体温  是否使用活性药物  急诊/择期  一般情况  \\\n",
       "0       2  25.203981  129.0  81.0  20.0  110.0  36.4         0      0   2.0   \n",
       "1       1  26.757812  123.0  79.0  20.0   74.0  36.5         0      0   1.0   \n",
       "2       1  20.213384  114.0  66.0  20.0   50.0  35.7         0      0   1.0   \n",
       "3       1  23.051755  117.0  77.0  20.0   78.0  36.5         0      0   2.0   \n",
       "4       1  15.943878  124.0  70.0  20.0   80.0  36.7         0      0   2.0   \n",
       "...    ..        ...    ...   ...   ...    ...   ...       ...    ...   ...   \n",
       "17352   2  22.558610  135.0  69.0  18.0   91.0  36.3         0      0   1.0   \n",
       "17353   1  22.761468  157.0  71.0  20.0   75.0  36.3         0      0   1.0   \n",
       "17354   2  27.005131  125.0  74.0  20.0   84.0  36.5         0      0   2.0   \n",
       "17355   1  29.136316  132.0  71.0  14.0   82.0  36.5         0      0   1.0   \n",
       "17356   1  23.140496  125.0  84.0  18.0   80.0  36.2         0      0   1.0   \n",
       "\n",
       "       ...  门冬氨酸氨基转移酶  间接胆红素  阴离子间隙  高密度脂蛋白  肺部并发症  年龄_术中  ASA分级    手术时长（分钟）  \\\n",
       "0      ...       10.0    3.5   16.6    1.41      0     67      3  230.316667   \n",
       "1      ...       54.0   13.0   16.4    0.98      0     66      3  288.216667   \n",
       "2      ...       60.0    4.5   19.2    1.15      0     70      3   70.033333   \n",
       "3      ...       36.0   17.8   19.1    1.44      0     82      2  100.000000   \n",
       "4      ...       25.0    6.8   17.0    1.56      0     73      2   34.983333   \n",
       "...    ...        ...    ...    ...     ...    ...    ...    ...         ...   \n",
       "17352  ...       14.0    7.8   14.6    1.44      0     69      2  399.333333   \n",
       "17353  ...       13.0    4.9   14.0    1.01      0     75      3  204.950000   \n",
       "17354  ...       20.0   13.3   22.7    1.10      0     82      3   89.333333   \n",
       "17355  ...       35.0    5.1   17.2    0.73      0     69      3   60.750000   \n",
       "17356  ...       17.0    6.2   18.4    1.15      0     66      2   43.950000   \n",
       "\n",
       "       出血量                                         术前诊断  \n",
       "0      NaN                                        2型糖尿病  \n",
       "1      NaN                                       胃窦恶性肿瘤  \n",
       "2      NaN                                         肝细胞癌  \n",
       "3      NaN                                          喉肿物  \n",
       "4      NaN                                       胃体恶性肿瘤  \n",
       "...    ...                                          ...  \n",
       "17352  NaN        1.左肺下叶结节：曲霉菌？ 2.支气管扩张伴感染 3.肺气肿 4.高血压病  \n",
       "17353  NaN                               1:右肾盂癌，高血压，糖尿病  \n",
       "17354  NaN                      1:左侧胫骨平台骨折 2:高血压病2级 很高危  \n",
       "17355  NaN  1:1.急性胰腺炎、2.胆总管结石伴感染、3. 胃癌术后、4. 高血压、5. 糖尿病。  \n",
       "17356  NaN                                       1:双肾结石  \n",
       "\n",
       "[17357 rows x 111 columns]"
      ]
     },
     "execution_count": 34,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_X_qs_remove"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "metadata": {},
   "outputs": [],
   "source": [
    "df_X_qs_remove.to_pickle('/home/yx/3090/project/P_prediction/Data/肺部并发症预测/data_time.pkl')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 85,
   "metadata": {},
   "outputs": [],
   "source": [
    "def fill_value(df, strategy):\n",
    "    cols = df.columns\n",
    "    if strategy == 'mean':\n",
    "        df_mean = SimpleImputer(missing_values=np.nan, strategy='mean')\n",
    "        df = df_mean.fit_transform(df)\n",
    "    if strategy == 'median':\n",
    "        df_median = SimpleImputer(missing_values=np.nan, strategy='median')\n",
    "        df = df_median.fit_transform(df)\n",
    "    if strategy == 'most_frequent':\n",
    "        df_0 = SimpleImputer(missing_values=np.nan, strategy='most_frequent')\n",
    "        df = df_0.fit_transform(df)\n",
    "    df = pd.DataFrame(df, dtype='float')\n",
    "    df.columns = cols\n",
    "    return df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 86,
   "metadata": {},
   "outputs": [],
   "source": [
    "def imp(model):\n",
    "    feature_importances = []\n",
    "    feature_importances.append(model.feature_importances_)\n",
    "    df_imp = pd.DataFrame(feature_importances)\n",
    "    df_imp.index = ['众数', '平均值', '中位数']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 106,
   "metadata": {},
   "outputs": [],
   "source": [
    "def select_xgboost(X, y):\n",
    "    model = XGBClassifier()\n",
    "    model.fit(X, y)\n",
    "    plot_importance(model, max_num_features=10, grid=False)\n",
    "    pyplot.show()\n",
    "    result = pd.DataFrame(model.feature_importances_, index=X.columns)\n",
    "    result = result.sort_values(by=0, ascending=False)\n",
    "    return result"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 107,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/home/yx/.conda/envs/P_prediction/lib/python3.6/site-packages/xgboost/sklearn.py:1224: UserWarning: The use of label encoder in XGBClassifier is deprecated and will be removed in a future release. To remove this warning, do the following: 1) Pass option use_label_encoder=False when constructing XGBClassifier object; and 2) Encode your labels (y) as integers starting with 0, i.e. 0, 1, 2, ..., [num_class - 1].\n",
      "  warnings.warn(label_encoder_deprecation_msg, UserWarning)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[21:03:03] WARNING: ../src/learner.cc:1115: Starting in XGBoost 1.3.0, the default evaluation metric used with the objective 'binary:logistic' was changed from 'error' to 'logloss'. Explicitly set eval_metric if you'd like to restore the old behavior.\n"
     ]
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAdkAAAERCAYAAAA3/N3bAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuNCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8QVMy6AAAACXBIWXMAAAsTAAALEwEAmpwYAAA+eElEQVR4nO3deXxV1dX/8c+XKYCYKI2oBStaHCoSpEaUAooEUJtQq2mtgrPW2jo9KEWoto9VW21F64Da4lBR9GeLrQpxAk1QEadYBbQVpYIDj1oJmjILuH5/7B29hJvkBnIysd6vV145wz7nrHPhZt29z7lnycxwzjnnXMNr09QBOOecc62VJ1nnnHMuIZ5knXPOuYR4knXOOecS4knWOeecS4gnWeeccy4hnmSda0UknSppnaRlKT/jmjqu+pI0UNLDTR2Hc1tL/j1Z51oPSacCR5rZ8Q20PzMzNcS+WgJJdwGzzeyuJg7FtRLek3XOOecS4knWuW2EpNMkLZK0VNKPU5b/StKHkj6QdFJcNlHSsji9TNLCON1T0pKUbS+TdFnK9OWSfi+pQlJWXN5f0j8kfSRpsqQ6e8aShkianTJ/qqTnJL0l6W5Jz0p6XVJ7SSbpLkn/kfQ3SdvFbfaU9LSkTyRNTVl+amw/Np73PpIOjOd7PHBTPOcRsf3ekl6K+39KUk5cPlvSeZIWSFou6YS4vHPc/yeS5kn6dlzeRdI9kv5PUrmkvbfwn9K1IJ5knWt9jo4J7SNJlwNI6g1cBOQDBwCXSdpZ0jeAIcDewCHANQBmNtbMcuN0rpntk+GxzwAqgX3NbJ2kDsC9wKlAD2BP4PtbeF7bAz8iJMLRQDdg17hufsr0efH33cD9wC7ABuBXKfsaAewBfBtYZGavxPO9HzgvnvPM2PZk4HYz6wYsiseu8mNgGHAOMCEuuyT+3gWYCNwU538JvA90B24mvtaudWvX1AE45xrcw2muyQ4lJJU343wnYB8ze0bSBcAYQrLduZ7HEpB6Y8cCM/tNyvw+QE+gKmF1APYDHqzncQBeBT4FPjSz9ySt5quOwu1mtlHS/wNOlLQ9IYEONjOTdAMh6V4c239GSKZfZHDcK4DjJN0OjAQ+TFl3k5l9LOkFIDsuOxL4mZltBO6RdF9cPgzYHTid8LpV1PcFcC2P92Sd2zYIuNvMdjGzXQh/7F+SNAj4O/A2cNIW7Ld7tfkX0hx3Ucpxe/BVz66+rNrv6seB8DctXeK0lDYAL2WYYAEeBvoDdwG3VVv37xpiEoCktsBPUpYVxtdhF+DwDI/vWjBPss5tG8qAQklfl7QD8Bqhl3kI8A/gL8AP0mxXIWmPeO1zB+C/QG687tgDKK7juAuB7eI11rZ8NXTc0M6K+z8BmGtmKwg93x9LagOcDzyawX6WEXr8SNopLhsE3AosYfOh7nSJeibw0xjPUXyVZJ+K8bQFjgEey+jMXIvmSda5bYCZLQB+DTwP/JMwzDkPeIAwfPt/hGHdldVuyBkHPAd8BPQxs+XAn4G5hB7pfdTCzNYBxwHXE4ZZVwJ/bKjzStE97n8jcEtcdhJwIvAxYZj61xns52bge5KWA/8bl/0emANMJyTuum5YupLQa/0QuBw4JS6/nDBM/yHh+vAZGcTjWjj/nqxzrkXb1r7L61oW78k655xzCfGerHPOOZcQ78k655xzCfEk65xzziXEH0bhvpSbm2s9e/Zs6jCcc65FeeWVV5aZ2U7p1nmSdV/q2bMn5eXlTR2Gc861KJLerWmdDxc755xzCfEk65xzziXEk6xzzjmXEE+yzjnnXEI8yTrnnHMJ8STrnHPOJcSTrHPOOZcQT7LOOedcQjzJOueccwnxJOucc84lxJOsc845lxBPss4551xCPMk655xzCfEk65xzziXEk6xzzjmXkBabZCW1kdRRUjdJe0saLOlkSTdK2rmp43POOde01q9fz8iRIwHYsGEDP/zhDxk4cCCnn376Ju3+8Ic/MGzYsBr3s2zZMgYPHkyfPn0YP358vWJoyUXbpwG9gOVADrACeBz4B9CxemNJ5wHbAQ8B55nZOXUdQNIdwCQzezVl2QDgZDP7aZzvAkwHTjCzj+vYXwcg28yWVVt+FLAMWA0cbWa/rba+OzDZzApTlj0A/MLM3kpZ9nVgtZl9Vte5pbNgaSU9xz+yJZs651yTW3L1l38iWbNmDQcffDBvvRX+RD700EP07duXadOmcdRRR/Haa69xwAEH8O6773LXXXex00471bjf66+/nsLCQsaNG0e/fv04/fTT2XvvvTOKqcX2ZM2s2Mz6mtnhwI3ADDO7yszuMrN0VerXA6vM7E3gfUk7AEhqK6lS0mxJn8VEmLrN52n2szFumwX8FfhbBgl2B+B4oKLa8s7A1cDHwJvAcZJ2qbb5OcCOkq6V1EVSLuHfbjtJuZLaxnYfAiMl7VpbLM4519p16tSJ+fPn06NHDwCOPPJILrzwQjZs2MBnn31GdnY2ABdccAFXXXVVrfsqLS1l+PDhtGnThsMOO4yysrKM42ixPVlJBwM3EZJgN6C9pO8DHYDzgCVACbAKENAzblccd3GspFHAv4EFZjZE0mxgtKT9434PBC6SNMnM/lHt+F2Ah4HpZnZzBiGfB1xtZlZt+U3ADWb2Xtzv1cDdkkaa2TpJecAQ4Jh4HtcCnYH9gJ8ChwJFwCIzM0n3Ar+VNCHNsTYj6SzgLIC22TV/knPOuZasS5cuABx88MHsuuuu7Lnnntx333307duX/fbbr9ZtKyoqyMnJASA7O5vly5dnfNwWm2TN7EWgP4CkU4FcM5tYrdlBcf1xwK8Iw8UXpkmY+8cEewBwOLCbmb0naXvCcPGbKb3FquOvlPRrM3sm7qMNIDPbWD3WOIy71szWpyxrD1wHtAf+nLLf+yUdBJRKOiPG9CIwhzDUvR64ARgL/BbIBjakbP+FpMVAP8LQea3MbDIwGSBr173qTMrOOdcSVVRU0KVLF+bOncvQoUMpKyujpKSE9957jyeeeIKFCxcyadIkzj333M22zc3NpbKyEoDKykp23333jI/bIpOspKHANYSEA7AT0E7SD+J8O+AyMyuJye9sYCqwK6HXdna1Xc5L6cl2Bh6JiS7VQOBKQqLeJfZ2D5D0WlzfJsY0I03IPQhDual2B7KAcmCepGxC7zsHeIlwzflD4H7gVeDgeA4/rvGF+cq7wB5kkGRT9emeQ3nKNQ3nnGstrr32Wvbbbz9OPPFEOnfuzJo1a7jvvvsAWLJkCWeeeWbaBAtQUFDAzJkz6devH08//TQXXHBBxsdtkUnWzEoJQ7lArT1ZgAuBWcBnwEpggKQCM3sqpc2X16bNbJWkO4G8asd8BjhUUj5wqpmdK2kqYQj49TpCXgIMrra/RcRhWkmVQDszu0PSMcC3zOz6uG5H4EfALsBxcfObCUl6KuHmr+p2p54J1jnnWrNzzjmHk046iZtvvplvfvObHHHEEWnbzZ07l7lz5zJ27Ngvl51//vkcc8wx3HvvvYwcOZJevdL92U1PGVy2a/ZqSrKSRgMnA98l9ADbAfcCZcDtwB3AWkICfpUwNPu1qiFfSZOIw8Up+8wHTgXGE4ajryXcEfx+HTFOIFx7XZ1m3VPAj83snXgX9Gdmdk+1Nk8SkvJ3gGeA38cYCoCSqhuv4o1b1xHuoK7XP25+fr6Vl5fXZxPnnNvmSXrFzPLTrWuxdxdXkfQ1wnDsF9WW7wWcABSnXic1s08JiakfYfi4DfHGJ+A31Xef5pDtCT3FJwjD1b8Anpb023jttSY3AKdJ+vLrRZJ2kPRn4PmYYLsREveSNOfYD7jCzKZW3SQVz+eOlATbnvAB4LL6JljnnHMNr8UnWeB04BTgydSFZva2mRWZ2cq4qH38wcwqzOwMM3snJuBRcfk1Kb3YMwl37v6n2vG+CXwDOM3M5pjZ48AgwIDKmoKMPdhbCdd0q1wLvGhml8b5CYSvB71UbfNOwCTgxJRl7QjXdFNlA3dU/x6uc865ptEqhouTEL+/utbMqveQ2wFZZraqaSJLjg8XO+dc/dU2XNwib3xqDOmuncblG0j5yoxzzjlXk9YwXOycc841S55knXPOuYR4knXOOecS4knWOeecS4gnWeeccy4hnmSdc865hHiSdc455xKSyPdkY+WbDoQnEO0A7EyoCpMP/KauAudJkrQvoYbsBqBN6iMXJbWL34Otmm+brnRdBsdo8POXJKBza3wIhnPOtVZJ9WSnEWqg/oVQqu1KoDuhMkzH6o0lnSdpvKR9Jd1cbd25kv4laXa1n8rqNV6rbXespFPSrDoZuBjYE3hU0qOSPpb0aIw71b2ShsT99ZZ0fxLnH/df42sQjSA+W1lSG0k/SPnZPmU/v5WUG6fPTCn/55xzGVm/fj0jR46scd7MOOWUUzjkkEP43ve+x4YN6Z/Ps3btWoqKiujbty8nnXQS2+ITBhPpyZpZcdV0HWXoqqwHVsXi6O9L2sHMPovrNgBXAfea2UZJOxE+HExLec7wDsCbwCfA3wlJ7RxggaSy1AfqA1cA/2Nm/5b0QzP7r6QSMyuS9GUlXkk7Ex7K/2xctIpQsSeJ86/xNZDUG7iFUAChTax5Owm4NP78AXg4HusbwDHA/8be9GxgNPBA7Amr+mMiUy1YWknP8Y9kcorOuVZkSUod6TVr1nDwwQfz1ltvpZ0HeO6559iwYQMvvPACQ4YMYebMmXz3u9/dbL9Tp06lR48elJSUUFRUxKxZsxgxYkTyJ9SMJDVcfDBwE/A50A1oL+n7hCHU8whVZkoIiUtAz7hdVXI6VtKoWHO1PeFB+E/HfZwGrGPTqjvrgMeBfxKq41xBSERPAXdJutzMXpPUxszWAFfFyjb3AUfEY+8A/JVQHB3gEkJv9FZJ+8QYesYk1x640swe25LzN7MXYxLP5DV4Q9IZwP7A28CBZvaApNOA14CXzKyqeP3/ANOBHxKq8XwR9/k44YPJRcCCdDE75xxAp06dmD9//pc1U6vPA+y8885fFi7v0KFDjfsqLS2luDj8SRs6dChlZWWeZBuCmb0I9Idae3IHxfXHAb8iVKe50MyqFxvvSKiEcxlhGLYNYei0OKVN1RjEAYRz+jah7uoFQDlwo6RbgCxJ5wIzzewSSf+V1DNuexzw5xjTwfEYL5hZVWH1noQScqc2xPnH67J1vgbxuL0IFYGGAW9L2i+ecyHwaGy3N3ASoaD7/UDbqpq0kn4N3BM/tGxC0lnE4vFts3eq69Scc4699toLgAcffJDPP/+8xgLoFRUV5OTkAJCdnc3ChQsbLcbmosGTrKShwDWE4U+AnYB2KdcG2xGSVUkc0jwbmEqo7XpWnE+1I/CGmT0p6Rxgo5mtk/RFmhuTuhF6dzcBfYCFhNqvNwEfmtnnkt4Aqi4ujAWqbkIqTZkW8BNC/dduhB5uR77qyU41s9u39vxj+7peg10I9XLnp8T2jTi9BDieUIj+GOByQtH5L+L12OfM7B1CibzfpovXzCYDkwGydt1r27tg4pzbItOnT+eGG25gxowZtG2b/vaY3NxcKitDBdDKykpyc3MbM8RmocGTrJmVAgdWzddxTfJCYBbwGbASGCCpwMyeSmmTB9wm6XxgNdBZ0iDgQ0LyeTel7V3AboSE9QAhQR0PXGJmn6eGKWkUcAYhGX6bMLycJWmimT2S0sNtB3xkZsfH8xlCKPreEOdf52tgZi9IOprQM7a4bJikc83sCUnflnScmf0uvi7D4n7vAU6QdB9hSHldTTFX6dM9h/KUazPOOZfORx99xDXXXMPjjz/OdtttV2O7goICZs6cSXFxMaWlpYwZM6YRo2wemux7spJGA8OB36csPhe4Nt5R3ElSe8JdwAcCAwnXYy+N7f4NfFdSVVJpQ+jh7U5IWG8BOYSbfd6udvjdgc/MrMDMjiQkoSPN7HAzq37nz3o2V++v9aSTyWsQl/UEis1sWOqxJXUlXI/enc09CrwOHAmkvXbsnHNbYsqUKXz44YccccQRDBo0iDvvvJPFixczduzYTdqNHj2apUuXkpeXR9euXSkoqLF/0molWk823lzUg9ADTV2+F3ACIXFsDDe+gpl9KqmAkHR2JdyUNDPe6PM3C/d/vyHpBMKdv88AE4DnCDf5VAK3E5LrUsKdyd2qDSvvRrhe+8s6wq/6ANIWGBqHiSF873X61px/PV+Dd+K5PSRpQ9wfhGHj8XH+3JRlSJrCV4l3L+BDSacDd5vZnZnE7pzbti1atKjG+YsvvpiLL754s20mTtx0wC4rK4uSkpJkAmwhki7afjpwCpvepETsWRalLGpfFYuZVRCGcZH0CPG7q5byBas4/Y/Yi/0H0Dkuv1nSdUBvvroZCmBwyvHmAUeZ2Qcp67ukib1TjKk9UFptuDjT2+PSnn+MNaPXIGXd9+NXep6pWmZm49LE3AE4I/WhGlVUy/eKnXPONTxti18Oro94Y9J2ZrYizbo51RatM7MGHw+JD5tYaQn/Y+Xn51t5eXmSh3DOuVZH0itmlp9uXdI92RYvPrxhswQb1w1qpBjSHt8551zz5gUCnHPOuYR4knXOOecS4knWOeecS4gnWeeccy4hnmSdc865hHiSdc455xLiSdY555xLiCdZ55xzLiGeZJ1zzrmEeJJtoSRdJmmhpGclPSHpOkkvx3VXpxQ0QNJmxdqdc1tv1apVHH300QwcOJBx48axePFiDjnkEAYMGMCdd9Zci2PZsmUMHjyYPn36MH78+EaM2DU2f3ZxCyXpMmCRmU2VNAG4AMgmFDt4BOhkZkNi20Vm1quufWbtupftesr1icXsXGuwJKXm8uTJk6moqGDChAkUFhbSuXNn+vfvz0UXXcRBBx3Es88+S+fOnTfbx6WXXkqXLl0YN24c/fr1Y9q0aey9996NeRquAdX27GLvybYOOwFrgPcIpe2ymjYc57YNWVlZrF69GjNj7dq1AKxYsYIvvviCtWvXsnDhwrTblZaWMnz4cNq0acNhhx1GWVlZY4btGpEn2ZbtkjhEPBJ4GHgFKCDU0s2IpLMklUsq37i6MqEwnWudRo0axWOPPca3vvUt9t13XyZOnMjcuXM57bTT2HHHHVmzZk3a7SoqKsjJyQEgOzub5cuXN2bYrhF5km3ZfmNmBwE3A9sTkuyphBq7GTGzyWaWb2b5bTvnJBOlc63UVVddxdlnn82bb77J8uXLWbJkCQ888AD33HMP69ato1u3bmm3y83NpbIyfKitrKwkNze3McN2jchL3bUOlYQk+zrQF/g5cHR9d9Knew7lKdebnHO1W7FiBR07dgTC0PG8efOYPn0648ePZ+XKlXzzm99Mu11BQQEzZ86kX79+PP3001xwwQWNGbZrRN6Tbdl+Kel54GdAB+BzYAHwapNG5dw24pxzzuHWW29lwIABrFmzhtGjR/PSSy9RVFTEjTfeiCTmzp3LxIkTN9nu/PPP59FHHyUvL4/CwkJ69arzvkTXQvndxe5L+fn5Vl5e3tRhOOdci+J3FzvnnHNNwJOsc845lxBPss4551xCPMk655xzCfEk65xzziXEk6xzzjmXEE+yzjnnXEI8yTrnnHMJ8STrnHPOJcSTrHPOOZcQT7ItgKQOTR2Dc865+vMk20xIOkrSZTWsvk3SI5JKYu3X1+J0iaTlkrrUY1/ObbNWrVrF0UcfzcCBAxk3bhzLli1j8ODB9OnTh/Hjx9e4XabtnKvOCwQ0E5KuBD40s5trWD88TuYAXczsLkndgEIz+3N99lWTrF33sl1Pub7+wTvXjC1JKd84efJkKioqmDBhAoWFhXTs2JGDDjqIcePG0a9fP6ZNm8bee++92T4uvfRSunTpUmc7t23yAgHNmKSbJC0AhgAnSJoTf16XdE1ssxtwG7BP3Ox/JJUAU4EX6rMv57ZlWVlZrF69GjNj7dq1zJ07l+HDh9OmTRsOO+wwysrK0m5XWlqaUTvnqvOi7U1vFTDGzJ5MXSjpSGCApJ2BO4EzgY+AkcAjwIuEGrL9JXUws3l17SvdwSWdBZwF0DZ7p4Y8L+eanVGjRjFgwACmTZtGQUEBH3zwATk5OQBkZ2ezfPnytNtVVFRk1M656jzJNr0NwM2SVlRbng3cZ2YfSzoR+EtcvhPQFhgY56+LCbbOfaU7uJlNBiZDGC7eqjNxrpm76qqrOPvssznzzDM54YQTeOutt6isrASgsrKS3XffPe12ubm5GbVzrjpPss3DOTX0Pg+Jsx2AN4ALgGOBLsDdcTq3nvuqUZ/uOZSnXL9yrrVZsWIFHTt2BMLQ8YABA5g5cyb9+vXj6aef5oILLki7XUFBQUbtnKvOr8m2DBuBHsB44BigME6fDcxvwrica1HOOeccbr31VgYMGMCaNWt48MEHefTRR8nLy6OwsJBevXoxd+5cJk6cuMl2559//mbtnMuE313cxCT9BvgeUFlt1Q7A38zsfyXdBnyN0KPtARjwb+CbwD+B08zs80z2VVss+fn5Vl5evnUn5Jxz25ja7i724eKmtx0136w0LM7eAawAlgGfmtnnKe1GEJJupvtyzjnXSLwn28QkZQEbzGxjU+/Le7LOOVd/3pNtxsxsXXPcl3POua3nNz4555xzCfEk65xzziXEk6xzzjmXEE+yzjnnXEI8yTrnnHMJ8STrnHPOJSSjJCvpZEnFko6R9LakCUkH5pxzzrV0mfZkzwMeBE4F+gLHJRWQc84511pkmmQ3AkcDK4EsQkk11wxJypVUZ8Ud55xzycs0yV4IHA78ChgVf7vm6RRgMICkiyQ9mfLzcBPH5lyTmD17NoMGDWLQoEHstttuTJkyhSFDhjBw4ECuuOKKGrdbu3YtRUVF9O3bl5NOOgl/DK2rr4weq2hmc4G5knKAO81sTbJhuS0hqQ1wMlAhKZdQrP14YJKZHS/pb7Vtv2BpJT3HP9IIkTqXvCUptZGHDBnCnDlzACgsLOSzzz6jd+/e3HzzzRx55JEsXryYPfbYY7N9TJ06lR49elBSUkJRURGzZs1ixIgRjXYOruXL9ManEyW9DjwP/FjSNcmG5bbQKcDdZjbUzC4mDPOn8o/hbpu2evVqFi1aRLt27VixYgVmhpnx2muvpW1fWlrK8OHDARg6dChlZWWNGK1rDTIdLj4f6Ad8bGY3AoclF5LbCi8CgyW9JemXcdlfgMMkPQ7kVd9A0lmSyiWVb1xdvQytc63LrFmzKCgoYPTo0Xz22WcUFxeTlZXFmjXpB+cqKirIyckBIDs7m+XLlzdmuK4VyDTJrgIGAEjanVDb1DU//wEeAuYCT8RlPwKeNrMjgfnVNzCzyWaWb2b5bTvnNFqgzjWFGTNmUFRUBMAdd9zB3//+d7KysujWrVva9rm5uVRWhg+flZWV5ObmNlqsrnXItNTdWcDvgW7AH4CfJRaR2xptgfbxd4c4/TdgH0lPAnvWtnGf7jmUp1zHcq41MTPKysqYNGkSM2fOZMqUKdx3333MmzePQw5Jf0N+QUEBM2fOpLi4mNLSUsaMGdPIUbuWLqOerJm9bWbHmFlvMzvWzBYmHZirPzP7GHgWeBfoCDwKHAXMNrNhhGu2zm2TXn75ZXr37k3Hjh056qijWLt2LYMHD+bSSy+lS5cuLF68mLFjx26yzejRo1m6dCl5eXl07dqVgoKCJoretVTK5JZ0Sbeb2ZmNEI/bSpIOAmYBY83s9rjsL2b2o7q2zc/Pt/Ly8qRDdM65VkXSK2aWn25dptdkLf7xds3fB8BpVQk2ym6qYJxzbluW6TXZTsCTkp4g3ARlZnZ6cmG5LWVmHxIegZm67KgmCsc557ZpmSbZS+KPc8455zKUaZLdPc2ydxsyEOecc661yTTJHh5/dwKGA28DzyQSkXPOOddKZPrs4l9XTUu6BLg5sYicc865ViKjJCvpGymzXYC9kgnHOeecaz0yHS7+dcr0OuDKBGJxzjnnWpVMh4tPS52XlJVMOM4551zrkWmpu2urLfKbnpxzzrk61NqTlZQN7AgMSrku2wX4IunAnHPOuZauruHiw4HvA98ALgMErAa2+VIUkjoBu5jZ4qaOxTnnXPNU63CxmT0cr8fON7PTzew0MzvHzF5opPiaswHA+LoaSRov6ewa1g2UdF2c7ihpL0nDJP0ydRtJ/SRdXm3bKZL2qrZsgqRBkq6SdLGkHSXdLan7Fp2hcy3I7NmzGTRoEIMGDWK33XZjypQp/P73v2fw4MEcddRRfP7552m3W7ZsGYMHD6ZPnz6MH1/nW9q5esn0xqcjUucl7WJmHyUTUvMmaSjwC2DXMKsnU1aPN7PqZWzWxp901gFV7/wHCeXplgIVwLKUdvOBP0m6JeV1Xx9/quIS8AqhGMB6wojDf4E+wK6Svg68X9u/24KllfQc/0hNq51rdpak1D8eMmQIc+bMAaCwsJB+/fpx7bXX8uyzz3LjjTfywQcfsOeem5dUvv766yksLGTcuHH069eP008/nb333rvRzsG1bpl+T/YK4Ghgu7hoJdA3qaCaua8BUwmvQRcgFygH8oHtqxpJupqU5z3HJDfSzP4U5wcCFwE7S7oQWEO41t0R6A58WbPXzDZKOtTMakrWEAqy/xw4GPgIeB/IAT4DvgucQKgnu01+OHLbjtWrV7No0SJefPFFPv30Uw499FB23nlnzjvvvLTtS0tLuemmm2jTpg2HHXYYZWVlnmRdg8m01N1hwHeAF4A84OPEImr+aivAawCS+gB5ZrYxZd0nwHlVQ7xm9hzh8ZQLgPtjm+8CxYSEuWPc1zGSngX+IKmvpOckzQYKgfslPS3pp4RnST8FTI8/a4GvAw8AtwIvmtlL1QOWdJakcknlG1dX1v/VcK6ZmTVrFgUFBXzyySfstNNOPPPMM3zwwQdf9nKrq6ioICcnB4Ds7GyWL1/emOG6Vi7Th1FsAA4g9GTzCD0tV7PTgZtSF5jZekmXAb8FfiipAzCS0NO8BugAnAj0JNSE/TBuOsPMHpT0tJnNAwYCSLoduNLMlsT5nYHX435KCL3iI4D9gecJw9CbMbPJwGSArF33qu0DhHMtwowZMzj22GN555132GeffQDYc889Wbo07VuA3NxcKivDB8zKykp23z1dPRTntkymSXYUsBvwK2As4U5jl0YcFh4KXJhm9YPA7yTtH9s8RajP+xAhKZcAjxKS7MkAZrYhbltXAiwGTiLcCT4EWG1mRTEZfx94ta7Y+3TPoTzlGpdzLY2ZUVZWxqRJk9hxxx257rrrAFi0aFHa67EABQUFzJw5k379+vH0009zwQUXNGbIrpXLaLg43iyzhnCzz++Ax5IMqoVbC/zEzDZLinH4eCRhmHgmcG9c9RrwhZlVEoaPMbN11beXNFFSl3QHNbNbgP8HPEFI5tPiqnuBn+D/Zm4b8PLLL9O7d286duzIgAEDyM3N5aCDDmKfffahf//+zJ07l4kTJ26yzfnnn8+jjz5KXl4ehYWF9OrVq4mid62R0uSCzRtJNxGu7+1B6MX+2MxGJhta8yNpFHAxoVe5EWgLtAdWEG56WgPcYmZ3xva9gHHAbDO7L83+DgJ+CLxHGCn4MyEprgT+Cvwx3vSUTbime7mZ/SZuu8lwcVymeLzjCEP8PwSuI1zj/ZWZldR2fvn5+VZeXv3maOecc7WR9IqZ5adbl+lw8QFmNlhSqZlNl3RxA8bXYsREuVmyrMXuhIRZUy8yi3Cde+e43wcIQ/PvxPl34rY7ANdVJdiUbb98hrSkQwh3M88B+gP7AjMIXzd6HnhE0idm9mI94nfOObcVMu3JTiMMYx4DXA8UmtlxyYa27ZHULuUabH23bQ/hBquUZZ3MbE2cbmNmtT4O03uyzjlXf7X1ZGu9Jivp3Dh5MlBJ6BHlAKc2ZIAu2NIEG7ddn5pg47I1KdP+vGnnnGtkdQ0XHwdMMrM1kvYys581RlDOOedca5DpwygA9kssCuecc64Vqqsn2y3eUStglzgNfHkTkHPOOedqUFeS/X9AVaWXv6RM+5OBnHPOuTrUmmTN7NeNFYhzzjnX2tTnmqxzzjnn6sGTrHPOOZcQT7LOOedcQjzJNiBJPerZvlNSsTjnnGt6nmQbiKTBwEOS0r6mknIljZY0OmXx3yT9oJ7HOU/SeEn7Srq5lnbnx2LuzrVIs2fPZtCgQQwaNIjddtuNKVOmsH79ekaOrL02ydq1aykqKqJv376cdNJJZPLoWOeSkmmBAFcLSVnAzcB64O+xh7qGUKFnmZmdAtxOqLDzS0l7ArcSXv9jJf0tXWm8GqwHVpnZm5Lel7SDmX2Wpt0G4PP6nMeCpZX0HP9IfTZxrkEtSalnPGTIEObMmQNAYWEhBxxwAAceeCBvvfVWrfuYOnUqPXr0oKSkhKKiImbNmsWIESMSjdu5mniSbRiTgGfN7BwASbPN7IiqlZJyCQ/06ALcCDwMTAB+Tyhxd4ukMWa2tvqOJe1MKOa+Ku6jZ1xeHJscK2mUmS2SNBE4HPgU6A58IekEYEfgZTM7u8HP3LmErV69mkWLFtG3b1/mz59fZ73X0tJSiovD22Po0KGUlZV5knVNxpPsVpK0HaFCUY6kRwgP6ugj6VFCT/b3hNqzs4GDgApCgu1GKEf3EfBvYCjwaPX9m9nHcTskHQf8ilAe70Iz+0e15huBi8xstqSzgbVmdpekIUDaYWlJZwFnAbTN3mmLXgPnkjRr1iwKCgoybl9RUUFOTg4A2dnZLFy4MKnQnKuTX5PdSma2ysxuJBRGP8fMioAFZvZdMxtuZrOA/wBfA7IJxdizgKlxOcArZrZZgk0Vr/WeHbebTkyM1cPZgvgnm1m+meW37ZxT382dS9yMGTMoKirKuH1ubi6VlZUAVFZWkpubm1RoztXJe7JbSZLi9dTaSskZ4bXuAwwnDBuvJPQ8C4F1QFkdh7oQmAV8FrcdIKnAzJ5KadMGuFZS6nDxicD2wCt1nUuf7jmUp1wTc66pmRllZWVMmjQp420KCgqYOXMmxcXFlJaWMmbMmAQjdK523pPdeiMkPQl8HZgkqYQwXFwSf2YChxKS6e2EoV4jPAd6f+BA4IraDhDvSB5OGHquci4hoZ6b8lWgWcCxZjYMuAG4Jk5/D3ioQc7WuUb08ssv07t3bzp27Jh2/eLFixk7duwmy0aPHs3SpUvJy8uja9eu9Rpqdq6hyW9vb3iSnjazw1Lm9wd+DbxEuNEpmzDc+yCwxMxq/KqNpL2APwDHm9nKeK21nZlNkvQ1QuL9jZm9U227nwJrzOyuTOPOz8+38vLyTJs755wDJL1iZvnp1vlwcTKqP2TiI+Ae4K04fbOZrZX0I+AZST3M7IN0OzKzt4HUC1Ltif9uZlYBnFFDDF0Iw9HOOeeaiPdkm5iktma2MU7PqbZ6nZk12liX92Sdc67+vCfbjFUl2Dg9qCljcc4517D8xifnnHMuIZ5knXPOuYR4knXOOecS4knWOeecS4gnWeeccy4hnmSdc865hHiSdc455xLiSdY555xLiCfZZkRSoSR/IIVzwOzZsxk0aBCDBg1it912Y8qUKaxfv56RI0fWut3atWspKiqib9++nHTSSfhT7VxT8iS7hSQdJemyGtZlSxqVwT5elfSkpEVx0fmked6wpH6SLq+2bEosHpC6bIKkQZKuknSxpB0l3S2pe8Yn5lwzMWTIEObMmcOcOXPIy8vjgAMO4MADD2TWrFm1bjd16lR69OjBvHnz+PTTT+ts71yS/LGKW24g8GEN60YBqwEkPQG0BfoC8+LvHma2Dvg/MyuU9IikbsBhwBWSqvZzgZm9AcwH/iTpFjP7KK5bH3+IxxGhZmx2XL4a+C+hhu2ukr4OvJ+y/WYWLK2k5/hH6vkyONdwlqSpZ7x69WoWLVpE3759mT9/Pr169ap1H6WlpRQXFwMwdOhQysrKGDFiRCLxOlcXT7L1JOkmYAhQGedPiKt2AB4jlJ47BLhE0jXAejM7QlKJmRXF3+viNtvFourZhKLs55nZbXG/lxMq7mBmGyUdamZrawltT+DnwMGESj/vAzmEIu/fBU4ATonrUs/nLELZPdpm77QlL4lziZo1a1a9asJWVFSQk5MDQHZ2NgsXLkwqNOfq5Em2/lYBY8zsydSFko4EBgDHA1nADEJSu6qO/a0EviAkye9JOhWYTvi3WSPpGEICfl3SH4FbCD3VfYD9Ja0D7gduA54CPiYk0m8RCsk/APwV2NPMXqp+cDObDEwGyNp1L7945ZqdGTNmcOyxx2bcPjc3l8rKSgAqKyvJzc1NKjTn6uRJtv42ADdLWlFteTZwn5ndJOl1IM/MFkhC0pNA36rfKdt8AXxASJrjgHOB2/kqUa8FZpjZg7EQ/DzCMDWSbgeuNLMlcX5n4HWgA1AS930EsD/wPLC0rhPr0z2H8jTDdc41FTOjrKyMSZMmZbxNQUEBM2fOpLi4mNLSUsaMGZNghM7Vzm982jLnmFl+6g/hpiUk9QamAN0lnQxgZsOAF6t+p+ynPdCLUGAd4EfA3YSE3Qv4r5ltiOvq6mUWA5cAPwEmEhLw7+Ixvg+8uhXn61yTePnll+nduzcdO3ZMu37x4sWMHTt2k2WjR49m6dKl5OXl0bVr13oNNTvX0Lwn2/AWEXqbHxCS53HpGknqCCwzs/slnRkX/8XMxkraF/iZmX1abZuJwGVmtrL6/szsFkntgAMICfW/cdW9hOHkPbf6zJxrZP3792f69OmbLFu0aNGX03vssQcTJ07cZH1WVhYlJSWNEp9zdfEkW38C/iCpstryHYC/EYZozyAMKz8O1DRcPJhw1zCE3qYRerCY2ZuSFko6zsz+KimbcL13VroEm+ImwrDzqcAGSWXAOYSh4sMJw8jOOecaiSfZ+tuOmm98Gga8AZxrZu/H5cVxmLiqXYmkLCAfeEDSrcAcQs+3o6TZselK4Ow4vQNwnZn9JuWQWfGnar+HEIaL5wD9gX0JN1/9gnBN9hFJn5hZ6nC1c865BMmfhlI/MUFuMLPNHhrRlCRVfd0n9buzncxsTZxuY2Zf1LaP/Px8Ky8vTzZQ55xrZSS9Eu/N2Yz3ZOsp5TuuzUpqck1ZtiZlutYE65xzruH53cXOOedcQjzJOueccwnxJOucc84lxJOsc845lxBPss4551xCPMk655xzCfEk65xzziVkm0mykrqmm66lffonkm/ZsQslDWqo/TnnnGsZtokkK2lv4GlJuZI6AM9VS7rtJT1bbbN7Je1fbT/ZkkZlcLxXJT0pqepJ5ucDG6u16VBbIpfUruopTinLjpJ0kKTekn5RbZ1igYCq+dMljUhZ116S6ordueZi9uzZDBo0iEGDBrHbbrsxZcoU1q9fz8iRI2vdbu3atRQVFdG3b19OOukk/Kl2rim1+ic+xWT6HcLD+o8mFDv/gFAgfa6ZvWVm6yVtjO3vAHIIhQAuq0rKsWzcKGB1bPcE0JbwwP958XeP+ESo/zOzQkmPSOoGHAZckZLjLgB2B34hqaqU3V5AJfCfON8GuBp4NB6vc5wfSXjg/72S7jSzj2L7HsDfJK0hFBvoDiyNyViEOrPHAe/X9FotWFpJz/GP1OPVda5hLUmpZzxkyBDmzJkDQGFhIQcccAAHHnggb731Vq37mDp1Kj169KCkpISioiJmzZrFiBEjEo3buZq0+iQL5BKKoF8NDCcUM/8NcCShk7cTcD2wp6T7zex4SccAXYG/AyuAjbHdIcAlkq4B1pvZEZJKzKwo/q565OJ2kk4kVNW5EDjPzG4jHPByoL2ZPUpMoHH5k8BFsTB7OjcBN5jZe7H91cDdkkaa2Toze1/STYQPEEb4QPBonM8HVlUVLXCuJVm9ejWLFi2ib9++zJ8/n169etXavrS0lOLiYgCGDh1KWVmZJ1nXZLaF4eL1QB5wKaEnOzJOjwTMzJ4DxgILgNGS9gJOBP4C/AkYbmG86XhC1ZsZhMLqdVlJSOh7AmMkPSfpYsIHmy+fKRxL2s0BOgI3S5oj6fWU9e1j8mwP/LlquZndH2MujfVnIdSN/Q9wFqE3fgjwL6DSzO5JF6SksySVSyrfuLp69T7nmt6sWbPqVXi9oqKCnJwcALKzs1m+fHlSoTlXp22hJ9uOUP7tQuCYOD8N+CFhGBVCUtqX0FtcTUiM/yIk1CWSZGY3xeSXZ2YLJNVUJxZCcv2AkODHAecCt/NVol6b0vZTM9vkpihJqeXodo/blAPzYm3ZJYQk+lI8lw9j21ExjqXAHsDOhPJ3XSR9y8x+Vf3FMbPJwGSArF338otXrtmZMWMGxx57bMbtc3NzqawMHxgrKyvJzc1NKjTn6rQtJNlPgWeA7xOGTdsQkt964CVJhxOGV9+Mbf8XOBT4CXAtoUc7TFJ3YApwv6STAcxsWOpwccox2wO9gC5x/kdxn3OAbwL/TWnbJfZkU32Z7MxsEeFDALFQfDszuyMOaX/LzK5P2e4TM7tQ0lHAUOAFYIWZzZS0W10vVJ/uOZSnXBNzrqmZGWVlZUyaNCnjbQoKCpg5cybFxcWUlpYyZsyYBCN0rnaterhY0neAqYTrr0cSho37pMxfB3wduChuciUhsZ4A/Bx4kFCA/TNgETAQuDgur+mYHYFlcTh3ZVz8FzPrT+gxvmtmn6ZsstLMBqX+1HJKJwNlcboHKTcxxXMdK+khwgeEEfG8fhaX3SYpbb1D55qrl19+md69e9OxY/ob8RcvXszYsWM3WTZ69GiWLl1KXl4eXbt2rddQs3MNbZsq2h57oG3N7M9p1s02syGS+hKGaC8BHgKK4s9g4AxgA1/dqdyRlLuLzWwnScOBQWb2v5KeJiTGS8ysqjd6DqHH+dc4/0827dkCtIlJuSq2HYA/AEvN7NJ4x/JE4DYzeza2aUO4xmxx/lxgkZk9nunr40XbnXOu/rxo+1fakeac4/dR20vqQuj5zgO+Z2YfS/oQOBh4g9CrfT9uU2xmw1L2USIpizAk/YCkWwnDwx8AHSXNjk1XAmenHN7M7JBq8bxRLcRrgRfN7I9xfgLhe7cvpeykelH2rHTn6pxzrvFsUz3ZTEjKMbPKOH0ecKuZbahjs1bBe7LOOVd/3pOth6oEG6dvaspYnHPOtWyt+sYn55xzril5knXOOecS4knWOeecS4gnWeeccy4hnmSdc865hHiSdc455xLiSdY555xLiCdZ55xzLiHNLsnG+qnKoJ0kbdcYMaU5didJe2zlPtpUnWc8l7aSviGpeCv3e2bcX/v4PGPnnHNNpNGe+CTpNOApQu3TLkB3Qtm3A4F/xqo1EB68Pww4W9KzhKLpa9PscgRwFPA/MZmkFpx8wsxWpBz7KGAZoVbs0Wb225R1HQjPKX5A0j2EZxS/RKg92wfYCzik6pnF0QBC+bqfbMW5/jSe4w6E+rLXAl2BHSW9S6h1+18zW5juGPE4dwCTzOzVlMXHE0rlfR0YLKnqmcZd6qjw41yzMHv2bC699FIA3n33Xa688kqmTZvG+++/T15eHnfffTfpPoevXbuWH/zgB3W2c64xNeZjFV8FHgCuISTRzoTi5n8klJGrMoqQcADWmdlaSV8D+pvZY5J6A7fEbdvEB+9PAi6NP38AHq7amaTOwNXASEIx83sl3WlmHwGY2eeSzpVUBnQDfg18DtxAqMnaJaUowFDgF8CuYVZPpsQ93syqHvybybneBrwLHEEoEP8QoYTeDYRatACLJbUFlsd9HgB0M7PP4/r1MdZUlwHtzeyO1IWSHqEOC5ZW0nN8nc2ca3BLUuoYDxkyhDlzQonlwsJCKisr6dGjByUlJRQVFTFr1ixGjBix2T6mTp2aUTvnGlNjJtl/AocB681smqQhQL6ZPR+HN9sA+xF6msdW2/a3wHvAY2b2hqQzgP2Bt4EDYy/0NOA14CUzW5+y7U3ADWb2HoCkq4G7JY00s3WxzWRC9ZwvgMsJ1XPuBI4DzkvZ19cIVXpWEnqouUB53Hb7ep7rj4ALCb3Og4FvA//HVxV6PjWzY+KQ8oJYhm82MFrS/oTkeiBwkaRJZvYPSU8AhcBvJV0S97PUzE4hpRC8cy3B6tWrWbRoETk5ORQXh6soQ4cOpaysLG3yLC0tzaidc42pUZKspF0IPbUzgd/FknA7AttLOpJwbXgKIcG8X23bk4FsM/tNnO9J6OkdSuglvi1pP0ISKQQeje3aE4qytwe+rB9rZvdLOggolXSGmb1pZvfFHmMhcAFhqPVPcdtfSpoeh3hrS1RVdVwzOddbCUnyWWAFoUeqeLxLCQl8TIzXJO0fE+wBwOHAbmb2nqTtCcPFb8YY1pvZBkm9q8rw1dWDlXQWcBZA2+ydamvqXKOaNWsWBQUF/Pvf/yYnJweA7OxsFi5MfwWloqIio3bONaZGuTEmDs3+CPi6mRXGBHARMNnMhpnZUMIQ77XAkpRNOxKuMZ6WsmwXoAcwn9CL/C/wjbhuCSHxQii8nhXbzJO0RNJsSa8Sktg0wjVTYhH07wDfBf5N6B2fAiwm9Gg3Sfxbe65mNo3Qa94N+ATYmZBkOxLqz+4ObJBU9SFonpkNIfTUOwOPSOpYWxiSnozD2d3qiHeymeWbWX7bzjmZnqZziZsxYwZFRUXk5uZSWRmKY1VWVpKbm5u2fabtnGtMjTlc/B7hZiUkfQwsiNM/BJ4ys18Qip2fK+mbhCHi/dn0GiRm9oKko4H+xN6jmQ2TdK6ZPSHp25KOM7O/EntokiqBdmZ2h6RjgG+Z2fUpsd1AuEZ6BtCXkMSqCqT3B+5O4FwNWAWUAt8H9iHcaDUj7mMfQnL+HSkfhsxslaQ7gbzaAkjpyZZkGnSf7jmUp1wbc66pmBllZWVMmjSJjz76iJkzZ1JcXExpaSljxoxJu01BQUFG7ZxrTI35FY9DCdcdAcpjr24YcA5huDTVzsDE2O5zgGpf1+kJFMftN1YtlNQVWEfoCaY6GSiL0z1I6ZnGa57tCUntJsJdv+cCO8QmDwGf1utMMzvXNkAloQc9C3gGmA2cQujRP2Fmv4vx5VUNF0tqa2Z/MLOXajl+VkpP9hu1tHOuWXr55Zfp3bs3HTt2ZPTo0SxdupS8vDy6du1KQUEBixcvZuzYsZtsk66dc02tMXuyFxJ6pxCSRdWdudsDj6W0k5nNBZCEpK5mtpxws9JEM3ueMNT6kKQNhKQJYbh1fJw/N26/A+Fu4+fN7J04LHwQoddaJR/4t5ndGpP03oSENw74l5n9M+5rFHAxoQe6EWhLSM4/iOdwvKRbzOzODM+1LSEZ9yYMEz8JvA5MiOsHSvoxYbi66sann1d7Tat/P0EAZjZ8k4UhUft3Zl2L0b9/f6ZPnw5AVlYWJSWbDsjsscceTJw4cZNl6do519Qa68anzoReW1Xv6w0zqxpOzWfT77h2ktTOzDYAfwMejkliMeH6KoTk9n0z+0zSM1XLzGxctUNfC7xoZlVDvxMICTK1F7gCuF/SqYS7h68ELgGKgZ9Kmgo8bGb3Afc14Lm+B5xoZvNiL33vqu+7SvofYJWZ3RbnRwGY2TUpxzmTkKR/mXL4LEntU++ujq/dc4ResnPOuUYks8b/ZoekTma2Ziu23x5YaQ0YvKRvAW/Gu3n3NrO34vIOQCczq9zC/W7Vuday387AWjP7IoO225nZqrra5efnW3l5eV3NnHPOpZD0ipnlp1vXmMPFX9rapJP6NKeGYmb/Spl+K2X6czZ/4EN99tvgCTbud3U92taZYJ1zzjU8v07nnHPOJcSTrHPOOZcQT7LOOedcQjzJOueccwnxJOucc84lxJOsc845lxBPss4551xCPMk655xzCfEk65xzziXEk6xzzjmXEE+yzjnnXEI8yTrnnHMJaZIqPK55krQCWNjUcWyBXGBZUwdRTx5z42mJcbfEmKFlxt0QMe9uZjulW9EkVXhcs7WwpnJNzZmk8pYWt8fceFpi3C0xZmiZcScdsw8XO+eccwnxJOucc84lxJOsSzW5qQPYQi0xbo+58bTEuFtizNAy4040Zr/xyTnnnEuI92Sdc865hHiSdc455xLiSdYhqaOkEknzJN0jSU0dU00UTJH0gqTpkrq0oNjHSHpSUq6kZyUtkHR1U8dVG0njYqyPSerW3OOWtJ2khyU9J+n3LeG1ltRe0ow4vdl7sTm+P6vFXP092a45xgybxp2ybIykJ+N0g/9/8STrAE4EPjCzvsCOwPAmjqc2A4F2ZnYIkA2cTguIXdLuwKlx9n+AR4C+wFGS9m6isGolaU+gt5kNBh4Drqf5xz0aeMHMBgK9gT/RjGOW1Al4ha/+36Z7Lzar92eamKu/J0fQzGKGtHFXf19CAu9NT7IOYCgwK06XAoc3YSx1+Ri4IU5/DlxGy4j9BmBCnB4KzDKzL4Cnab4xFwA7SnoGGAzsQfOPex3QOfacOgLfoRnHbGZrzCwP+CAuSvdebFbvzzQxV39PQjOLGdLGDZu+LyGB96YnWQfwNaAyTv8X6NqEsdTKzN42s5ckHQN0IHwybdaxSxoFzAP+GRe1lNd7J+ATMzsU6AH0p/nHfR9wFPAv4E1CnM095lTp/m806/8vad6TT9DMY4a070tIIG5Psg7Ccztz4nQOzfzZo5K+B1wAjAT+Q/OPvYjQK7wfOJDwrNTmHjOEPzJVz7J+B1hC8497AvBHM9uX8Adyb5p/zKnSvReb/fsz9T1pZhtpATFT7X0p6VwSiNuTrAN4inAdBcJwSVkTxlIrSbsAPwcKzWwFLSB2MxtlZoOA4wk975uBEZLaAIfRDGOOXgEOitO9CAm3uce9PbA2Tq8Dnqf5x5wq3f/nZv1/PM17Epp5zLD5+9LMJhHjbsj/L55kHcC9QHdJ84HlhP9ozdUpwK7AE5LmAO1pObFXuRH4LjAfeMTMFjVxPGmZ2fPAMkkvExLsyTT/uG8GfirpeaATcAzNP+ZU6d6Lzf39ucl7UtLpNP+Ya9Lg701/4pNzzjmXEO/JOueccwnxJOucc84lxJOsc845lxBPss4551xCPMk65xIl6TJJC+Odp3Mknd/UMTnXWNo1dQDOuW3CFWY2tamDcK6xeU/WOdcsxMotMyTNlfRASjWX+yU9Hyu8dJKUJen/xUo790nqELefLekqSY/H+Z0lPS7pRUkTaj+6c8nwJOucawyXxCR4Sy1tegNmZt8Bbge6AGcB88xsADAdyAN+DPwzVtp5Gzgtbn8w8LKZHRnnJwD3m9nBwNGSvtbgZ+VcHTzJOucaw2/MbIiZ/ayWNv8AFsR6n0cBq4B9gZfi+juBcmA/wuMSib/3i9NvmNnfU/a3D+HpT7MJCfvrDXEiztWHJ1nnXHNxAKEW7EhCEYVDCZV0Do7rLyH0Wt8ADonLDonzACur7W8hMN7MhgATgU+TCty5mniSdc41F4uB8yS9ROh1lgOTgQPic6r7AVMJQ8m9JT1HqLJzVw37uxr4uaQXgGHAR8mG79zm/NnFzjnnXEK8J+ucc84lxJOsc845lxBPss4551xCPMk655xzCfEk65xzziXEk6xzzjmXkP8P/1WbdJK2h0wAAAAASUVORK5CYII=",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "df_X_qs_remove_fill = fill_value(df_X_qs_remove, 'mean')\n",
    "a = select_xgboost(df_X_qs_remove_fill, y)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 109,
   "metadata": {},
   "outputs": [],
   "source": [
    "a.to_excel('/home/yx/3090/project/P_prediction/Data/肺部并发症预测/xgb_new.xlsx')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 88,
   "metadata": {},
   "outputs": [],
   "source": [
    "def StandardScaler(data):\n",
    "    data=(data-data.mean())/data.std()\n",
    "    return data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 89,
   "metadata": {},
   "outputs": [],
   "source": [
    "def select_lasso(x, y):\n",
    "    lasso = Lasso(alpha=0.01, max_iter=100000)\n",
    "    lasso.fit(x, y)\n",
    "    # 相关系数\n",
    "    return lasso.coef_"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 94,
   "metadata": {},
   "outputs": [],
   "source": [
    "df_X_qs_remove_fill = fill_value(df_X_qs_remove, 'mean')\n",
    "data = StandardScaler(df_X_qs_remove_fill)\n",
    "conf = select_lasso(data, y)\n",
    "result = pd.DataFrame(conf, index=df_X_qs_remove_fill.columns)\n",
    "result[result[0] != 0].to_excel('/home/yx/3090/project/P_prediction/Data/肺部并发症预测/lasso_new.xlsx')"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3.6.13 ('P_prediction')",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.13"
  },
  "orig_nbformat": 4,
  "vscode": {
   "interpreter": {
    "hash": "676c8c2553a8289d770cd55f8a54cd76a7646969c2ee13a0fd2fdbb12048cb4a"
   }
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
